#!/usr/bin/env python
# -*- coding:utf-8 -*-

"""
@author zyx
@since 2022/2/18 16:25
@file: c03_selenium_动态加载.py
"""

# 获取前5页的企业名称
from selenium import webdriver
import time
from lxml import etree

bro = webdriver.Chrome(executable_path=r'D:\Coding\chromedriver\chromedriver.exe')
url = 'http://scxk.nmpa.gov.cn:81/xk/'
bro.get(url=url)
time.sleep(1)
# 获取页面源码数据(page_source)
page_text = bro.page_source
# 将前5页的页面源码数据存储到该列表中
all_page_text_list = [page_text]
for i in range(4):
    # 点击下一页
    next_page_btn = bro.find_element_by_xpath('//*[@id="pageIto_next"]')
    next_page_btn.click()
    time.sleep(1)
    all_page_text_list.append(bro.page_source)

for page_text in all_page_text_list:
    # 解析数据
    tree = etree.HTML(page_text)
    li_list = tree.xpath('//*[@id="gzlist"]/li')
    for li in li_list:
        title = li.xpath('./dl/@title')[0]
        print(title)

time.sleep(2)
bro.quit()
